********************************************************************************
******                        Read in the data                        **********

clear
cd "C:\Users\worri\Dropbox\Poverty\COVID19"
use Monthly_Poverty_0520.dta 

gen yr_month=year*100+month   
keep if yr_month>=202001
** replace asecflag 2 => asecflag . 
replace asecflag=. if asecflag==2 
** drop irrelevant variables
drop month hflag qinc* qoinc* cpsid cpsidp durunemp whyunemp whyabsnt wnlook wkstat wksworkorg occ ind classwkr earnwt earnweek

********************************************************************************
******   Construct a family unit variable for Poverty measures, famnum    ******
** For the poverty measure, a primary family and realted family is considered as one family

/* Assign 1 to primary+realted families */
gen famnum = ftype==1|ftype==3 

/* Assign 2-4 to unrelated subfamilies */ 
sort yr_month asecflag serial ftype pernum
* Each subfamily has a reference person  (famrel==1)
//tab famrel if ftype==4
* Calculate # subfamilies in a household (Max 3 in this sample period)
bysort yr_month asecflag serial: egen tot_subfam = sum(ftype==4 & famrel==1)
tab tot_subfam if ftype==4 
sort yr_month asecflag serial ftype famrel
bysort yr_month asecflag serial ftype: gen subfamnum = sum(famrel==1)
tab subfamnum if ftype==4 
* Assign 2-4 to the 1st-3rd unrelated subfamily
replace famnum= subfamnum+1 if ftype==4 
drop subfamnum 


/* Assign 5-20 to non-family members (ftype=1,2 or famrel=0) */
* Calculate # non-family members within a household (Max 16 in this sample period)
bysort yr_month asecflag serial: gen non_fam = sum(famrel==0) 
tab non_fam 
* Assign 5-20 to the 1st-20th non-family member. 
replace famnum= non_fam+4 if famrel==0  
drop non_fam


* Construct demographic variables related to the poverty threshold 
* 1) family size
bysort yr_month asecflag serial famnum: gen fam_size = _N
* 2) related children under age 18 (excluding HH head or spouse)
bysort yr_month asecflag serial famnum: egen nchild = sum(age<18 & relate>=301)
* 3) the head is elderly (>=65)
bysort yr_month asecflag serial famnum: egen head_elderly = max(age>=65 & relate==101)
* 4) a few cases where all members are children -> change to one adult w/ two children 
replace nchild=nchild-1 if fam_size==nchild


* Assign poverty thresholds from the ASEC data to the relevant dem. group in the Monthly CPS
merge m:1 year fam_size nchild head_elderly using pov_thresh
keep if _merge==3
drop _merge

* Merge this household level data to the original individual level data
merge m:1 yr_month asecflag serial using impute
keep if _merge==3
drop _merge


********************************************************************************
* Missing incomes or CPS-imputed income 
gen miss_inc = (faminc>=995 & faminc<=999)|(qfaminc>0 & qfaminc!=.)

* 1) identify the HH head's family  
bysort yr_month asecflag serial famnum: egen hfam = sum(relate==101) 


********************************************************************************
** Appendix Table 1: Sample Size (# individuals/households)
* number of individuals 
gen n = 1
bysort yr_month: egen obs1 = sum(n) if wtfinl!=0
bysort yr_month: egen obs2 = sum(n) if wtfinl!=0 & (mish==1|mish==5) 
bysort yr_month: egen obs3 = sum(n) if wtfinl!=0 & (mish==1|mish==5) & hfam==1 
bysort yr_month: egen obs4 = sum(n) if wtfinl!=0 & (mish==1|mish==5) & hfam==1 & miss_inc==0

** Number of households 
preserve
gsort yr_month asecflag serial -hfam famnum pernum
bysort yr_month asecflag serial: keep if _n==1
bysort yr_month: egen obs5 = sum(n) if wtfinl!=0
bysort yr_month: egen obs6 = sum(n) if wtfinl!=0 & (mish==1|mish==5) 
bysort yr_month: egen obs7 = sum(n) if wtfinl!=0 & (mish==1|mish==5) & hfam==1 & miss_inc==0
collapse obs5 obs6 obs7 [w=wtfinl], by(yr_month)
tempfile AT1
save `AT1', replace
restore


preserve
collapse obs* [w=wtfinl], by(yr_month)
merge 1:1 yr_month using `AT1'  
drop _merge
xpose, varname clear
order _varname
export excel using App.T1.xlsx, firstrow(var) replace
restore

* we made adjustment to the survey weights so that the sum of the weights in our sample represents the total U.S. population.
* adjustment factor of 5.7 (footnote 17)
preserve
egen fwgt = sum(wtfinl) 
egen awgt = sum(wtfinl) if (mish==1|mish==5) & hfam==1 & miss_inc==0
sum fwgt awgt
restore


********************************************************************************
*** Appendix Table 2: Demographic and Economic Characteristics by Interview Month
preserve
keep if yr_month>=202002
keep if hfam==1 
drop if wtfinl==0

gen int_mo = mish==1
replace int_mo = 2 if mish==5 
replace int_mo = 3 if mish!=1 & mish!=5 

bysort yr_month int_mo: egen missinc = sum(miss_inc*wtfinl)
bysort yr_month int_mo: egen totwgt = sum(wtfinl)
replace missinc = missinc/totwgt 

* drop observations with missing income
drop if miss_inc==1

gen male = sex==1
* Head's marital status
bysort yr_month asecflag serial famnum: egen head_married = max((marst==1|marst==2) & relate==101)


gen s_kids = head_elderly==0 & head_married==0 & nchild>=1 //Single Parent       
gen m_kids = head_elderly==0 & head_married==1 & nchild>=1 //Married Parent        
gen s_indv = head_elderly==0 & head_married==0 & nchild==0 //Single Individuals   
gen m_nokid = head_elderly==0 & head_married==1 & nchild==0 //Married w/o Children  
gen elderly = head_elderly==1                               //Head 65 and Over  


gen white = race==100             // white   
gen black = race==200             // black   
gen other = race!=100 & race!=200  

gen hs_dropout = edu<73
gen hs_degree = edu==73   
gen some_college = edu>=74 & edu<110   
gen bachelor = edu>=110 & edu!=.

gen scale=((fam_size-nchild+.7*nchild)^.7)
gen inc1 = faminc==100|faminc==210|faminc==300
gen inc12 = faminc==430|faminc==470|faminc==500
gen inc23 = faminc==600|faminc==710
gen inc34 = faminc==720|faminc==730
gen inc45 = faminc==740
gen inc56 = faminc==820
gen inc67_ = faminc==830
gen inc7_10 = faminc==841
gen inc1015 = faminc==842
gen inc15 = faminc==843
gen inc_mean = 2.355*ifam_inc/scale

gen emp = empstat==10|empstat==12 

bysort yr_month int_mo: gen obs = _N
collapse (mean) missinc male white black age fam_size nchild s_kids m_kids s_indv m_nokid elderly hs_dropout hs_degree some_college bachelor emp /*
*/ inc1 inc12 inc23 inc34 inc45 inc56 inc67_ inc7_10 inc1015 inc15 obs [w=wtfinl], by(yr_month int_mo)
sort yr_month
order int_mo
* survey nonresponse rate (data from https://cps.ipums.org/cps/covid19.shtml)
gen srv_noresp = 0.2 if yr_month==202002 & int_mo==1|int_mo==2
replace srv_noresp = 0.17 if yr_month==202002 & int_mo==3
replace srv_noresp = 0.43 if yr_month==202003 & int_mo==1
replace srv_noresp = 0.31 if yr_month==202003 & int_mo==2
replace srv_noresp = 0.24 if yr_month==202003 & int_mo==3
replace srv_noresp = 0.53 if yr_month==202004 & int_mo==1
replace srv_noresp = 0.31 if yr_month==202004 & int_mo==2
replace srv_noresp = 0.26 if yr_month==202004 & int_mo==3
replace srv_noresp = 0.52 if (yr_month==202005|yr_month==202006) & int_mo==1
replace srv_noresp = 0.32 if (yr_month==202005|yr_month==202006) & int_mo==2
replace srv_noresp = 0.29 if yr_month==202005 & int_mo==3
replace srv_noresp = 0.33 if yr_month==202006 & int_mo==3
order yr_month int_mo srv_*
xpose, varname clear
order _varname
export excel using App.T2.xlsx, keepcellfmt firstrow(var) replace
restore


********************************************************************************
** Appendix Table 13: 1) survey mode, 2) missing income rate, 3) footnote 10
preserve
keep if (mish==1|mish==5) & hfam==1 

* interview type
tab inttype
gen inperson = inttype==1
bysort yr_month: sum inperson [w=wtfinl] if yr_month==202006

putexcel set App.T13.xlsx, replace
putexcel A1 = "in-person interview (%)"
putexcel B1 = `r(mean)'

* survey nonresponse rate
* # HHs in 1st month: 3,716
* # HHs in 5th month: 5,144
* response rate in 1st month: 48.42
* response rate in 5th month: 68.35
putexcel A2 = "Survey non-response rate"
local nonresponse = 100-((48.42*3716)+(68.35*5144))/8860
putexcel B2 = `nonresponse'


* missing income rate
gen int_mo = mish!=1 & mish!=5 
bysort yr_month int_mo: egen missinc = sum(miss_inc*wtfinl)
bysort yr_month int_mo: egen totwgt = sum(wtfinl)
replace missinc = missinc/totwgt 
bysort yr_month: sum missinc [w=wtfinl] if yr_month==202006
putexcel A3 = "Missing income rate"
putexcel B3 = `r(mean)'

* Footnote 10: composition of Other race group
gen asian = race==651
gen twomore = race>=801
gen indian = race==300|race==652
bysort yr_month: sum asian twomore indian [w=wtfinl] if yr_month==202005 & race!=100 & race!=200  
restore


********************************************************************************

*** Footnote 6: 
** 1) predicting unemployment status using the non-analysis sample in the pre-COVID19 period  
keep if hfam==1 
keep if miss_inc==0

* month 1st and 5th vs. the rest
gen sample = mish==1|mish==5 
replace sample=2 if sample==0
* Jan.& Feb. vs. April-June
gen precovid = yr_month<=202002
replace precovid = . if yr_month==202003
drop if precovid ==.
replace precovid = 2 if precovid==0

* Head's marital status
bysort yr_month asecflag serial famnum: egen head_married = max((marst==1|marst==2) & relate==101)
gen var2 = sex==1
gen var3 = race==100             // white   
gen var4 = race==200             // black   
gen var5 = age
gen var6 = fam_size
gen var7 = nchild
gen var8 = head_elderly==0 & head_married==0 & nchild>=1 //Single Parent       
gen var9 = head_elderly==0 & head_married==1 & nchild>=1 //Married Parent        
gen var10 = head_elderly==0 & head_married==0 & nchild==0 //Single Individuals   
gen var11 = head_elderly==0 & head_married==1 & nchild==0 //Married w/o Children  
gen var12 = edu<73
gen var13 = edu==73   
gen var14 = edu>=74 & edu<110   
gen unemp = empstat==21|empstat==22
replace unemp=. if empstat==0
reg unemp var* [w=wtfinl] if precovid==1 & sample==2
predict pred_unemp
replace pred_unemp=. if empstat==0
sum unemp pred_unemp [w=wtfinl] if precovid==1 & sample==2


bysort precovid sample: gen obs = _N
preserve
collapse var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13 var14 pred_unemp obs [w=wtfinl], by(precovid sample)
sort precovid sample   
order precovid sample   
xpose, varname clear
order _varname
export excel using ExtraT2.xlsx, firstrow(var) replace // row17 reports predictd unemployment status 
restore

** 2) joint hypothesis that the demographic characteristics are the same for the sample in the post-COVID19 period vs. those in the pre-COVID19 period
svyset [pw=wtfinl]
mvtest means var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13 var14 [w=wtfinl] if sample==1, by(precovid) // 1st and 5th month, pre-vs.post-COVID19
mvtest means var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13 var14 [w=wtfinl] if sample==2, by(precovid) // other months,  pre-vs.post-COVID19
mvtest means var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13 var14 [w=wtfinl] if precovid==1, by(sample) // 1st and 5th month vs. other months, pre-COVID19 period
mvtest means var2 var3 var4 var5 var6 var7 var8 var9 var10 var11 var12 var13 var14 [w=wtfinl] if precovid==2, by(sample) // 1st and 5th month vs. other months, post-COVID19 period


/*
test statistical signficance of each characteristic 
rename pred_unemp var15
forvalues n = 2/14 {
	forvalues i = 1/2 {
		preserve
		keep if sample==`i'
		svy: mean var`n', over(precovid) coeflegend
		lincom _b[c.var`n'@1bn.precovid] -  _b[c.var`n'@2.precovid]
		putexcel set ExtraT2.xlsx, sheet(sample`i') modify
		putexcel A`n' = "Difference btw pre- and post-covid period. sample1: 1, 5th month, sample2: the rest months" 
		putexcel B`n' = `r(estimate)'
		putexcel C`n' = `r(p)'
		putexcel D`n' = `r(se)'

		restore
		
		
	}
}


forvalues n = 2/14 {
	forvalues i = 1/2 {
		preserve
		keep if precovid==`i'
		svy: mean var`n', over(sample) coeflegend
		lincom _b[c.var`n'@1bn.sample] -  _b[c.var`n'@2.sample]
		putexcel set ExtraT2.xlsx, sheet(period`i') modify
		putexcel A`n' = "Difference btw 1,5th month vs. the rest months. precovid1: April-June, precovid2: Jan-Feb" 
		putexcel B`n' = `r(estimate)'
		putexcel C`n' = `r(p)'
		putexcel D`n' = `r(se)'
		restore
		
		
	}
}


